データの読み込み

library(MASS)
library(DT)
library(car)
house_data<-read.csv("data/kc_house_data.csv")
DT::datatable(house_data, class = "stripe cell-border", filter = 'top', extensions = 'ColReorder', options = list(autoWidth = TRUE, dom = 'Rlfrtip', autoWidth = TRUE))
It seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.htmlIt seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.html

sqft_aboveを説明変数、priceを被説明変数として線形回帰モデルをやってみる

# 傾き:268.5、切片:59953.2
house_lm1<-lm(price~sqft_above, data=house_data)
house_lm1

Call:
lm(formula = price ~ sqft_above, data = house_data)

Coefficients:
(Intercept)   sqft_above  
    59953.2        268.5  
# サマリー:決定係数:0.3667、切片と、傾きのp値は2e-16
summary(house_lm1)

Call:
lm(formula = price ~ sqft_above, data = house_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-913132 -165624  -41468  109327 5339232 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  59953.2     4729.8   12.68   <2e-16 ***
sqft_above     268.5        2.4  111.87   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 292200 on 21611 degrees of freedom
Multiple R-squared:  0.3667,    Adjusted R-squared:  0.3667 
F-statistic: 1.251e+04 on 1 and 21611 DF,  p-value: < 2.2e-16

要約で大切な箇所は、Pr(>|t|) (p値)とMultiple R-squared(決定係数)

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 59953.2 4729.8 12.68 <2e-16 ***
sqft_above 268.5 2.4 111.87 <2e-16 ***

Multiple R-squared: 0.3667, Adjusted R-squared: 0.3667

残差の正規性をヒストグラムでチェック

誤差(ε)は正規分布に従うと仮定していたので、実際の残差が正規分布に従っているかを確認 正規分布にしたがっていないならば、仮定が崩れる。

hist(house_lm1$residuals, breaks=seq(-1e+06,5.4e+06,1e+05))

残差の正規性をqqプロットでチェック

標準正規分布に変換した場合、どの位離れているか?を確認

## qqプロット
qqnorm(house_lm1$residuals)
qqline(house_lm1$residuals, col="red")

正規性が保たれていないので、価格の対数でモデルを作成してみる。

house_lm2<-lm(log(price)~sqft_above, data=house_data)
summary(house_lm2)

Call:
lm(formula = log(price) ~ sqft_above, data = house_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.39738 -0.31422 -0.00033  0.28155  1.80559 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.236e+01  6.810e-03  1815.5   <2e-16 ***
sqft_above  3.828e-04  3.455e-06   110.8   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4206 on 21611 degrees of freedom
Multiple R-squared:  0.3622,    Adjusted R-squared:  0.3621 
F-statistic: 1.227e+04 on 1 and 21611 DF,  p-value: < 2.2e-16
hist(house_lm2$residuals,breaks=seq(-2, 2, 0.05))

qqnorm(house_lm2$residuals)
qqline(house_lm2$residuals, col="red")

誤差の正規性は良さそう。 #クックの距離 外れ値があるかを調べる

house_lm2_wip <- house_lm2
ck_dist <- cooks.distance(house_lm2_wip)
ck_dist[ck_dist == max(ck_dist)]
     12778 
0.01984297 

総距離は12778、最も外れているのは0.01984297 描画してみる。

plot(house_lm2_wip)

クック􏰀距離が、4/データ数より大きいデータ点􏰁はインフルエンスが大きいと言われている。 上図だと(1623、18595、12778)。外れ値を除外して、モデルを再構築

house_data_wip <- house_data[-c(1623, 18595, 12778), ]
house_lm2_wip<-lm(log(price)~sqft_above, data=house_data_wip)
ckck_dist <- cooks.distance(house_lm2_wip)
ck_dist[ck_dist == max(ck_dist)]
     12778 
0.01984297 
summary(house_lm2_wip)

Call:
lm(formula = log(price) ~ sqft_above, data = house_data_wip)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.3961 -0.3142 -0.0004  0.2817  1.8050 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.236e+01  6.823e-03  1811.6   <2e-16 ***
sqft_above  3.842e-04  3.465e-06   110.9   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4204 on 21608 degrees of freedom
Multiple R-squared:  0.3626,    Adjusted R-squared:  0.3625 
F-statistic: 1.229e+04 on 1 and 21608 DF,  p-value: < 2.2e-16
plot(house_lm2_wip)

今回は、外れ値を除くと、決定係数もクックの距離の大きくなってしまったので、外さない方が良い。

重回帰分析

準備:カテゴリ変数をダミー変数に変換する

waterfront,view,condition,gradeを変換する。変換は > as.factor(因子型)に変換する

house_data$waterfront<-as.factor(house_data$waterfront)
house_data$view<-as.factor(house_data$view)
house_data$condition<-as.factor(house_data$condition)
house_data$grade<-as.factor(house_data$grade)

重回帰分析

id,date,zipcode,lat,long,yr_built,yr_renovated を除いて重回帰分析 説明変数を増やせば、決定係数が上がるので、説明変数を増やすことへのペナルティーを加えた上で分析 分析結果は、数学的に証明されているAICで評価(592752.6)

house_lm3<-lm(price~.-id-date-zipcode-lat-long-yr_built-yr_renovated, data=house_data)
AIC(house_lm3)
[1] 592752.6

一方、自由度調整済み決定係数(Adjusted R-squared: 0.6467)は数学的な根拠が提案。

summary(house_lm3)

Call:
lm(formula = price ~ . - id - date - zipcode - lat - long - yr_built - 
    yr_renovated, data = house_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1629728  -117831   -18516    90412  4229913 

Coefficients: (1 not defined because of singularities)
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)    6.789e+04  2.183e+05   0.311 0.755857    
bedrooms      -1.746e+04  2.085e+03  -8.374  < 2e-16 ***
bathrooms     -2.431e+03  3.343e+03  -0.727 0.467052    
sqft_living    1.876e+02  4.662e+00  40.230  < 2e-16 ***
sqft_lot       2.075e-02  5.186e-02   0.400 0.689052    
floors         2.254e+04  3.860e+03   5.838 5.36e-09 ***
waterfront1    5.203e+05  2.135e+04  24.367  < 2e-16 ***
view1          1.432e+05  1.221e+04  11.733  < 2e-16 ***
view2          8.365e+04  7.388e+03  11.322  < 2e-16 ***
view3          1.353e+05  1.015e+04  13.324  < 2e-16 ***
view4          2.769e+05  1.567e+04  17.673  < 2e-16 ***
condition2    -1.267e+04  4.396e+04  -0.288 0.773148    
condition3    -1.912e+04  4.090e+04  -0.468 0.640121    
condition4     3.152e+04  4.093e+04   0.770 0.441150    
condition5     1.071e+05  4.117e+04   2.601 0.009291 ** 
grade3         2.751e+04  2.553e+05   0.108 0.914193    
grade4         3.835e+04  2.255e+05   0.170 0.864945    
grade5         2.267e+04  2.222e+05   0.102 0.918741    
grade6         5.815e+04  2.221e+05   0.262 0.793445    
grade7         9.320e+04  2.221e+05   0.420 0.674761    
grade8         1.555e+05  2.221e+05   0.700 0.483866    
grade9         2.874e+05  2.222e+05   1.294 0.195850    
grade10        4.741e+05  2.223e+05   2.132 0.032979 *  
grade11        7.454e+05  2.226e+05   3.348 0.000814 ***
grade12        1.209e+06  2.237e+05   5.404 6.60e-08 ***
grade13        2.498e+06  2.310e+05  10.813  < 2e-16 ***
sqft_above    -5.868e+01  4.644e+00 -12.636  < 2e-16 ***
sqft_basement         NA         NA      NA       NA    
sqft_living15  1.839e+01  3.669e+00   5.013 5.41e-07 ***
sqft_lot15    -7.014e-01  7.906e-02  -8.872  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 218200 on 21584 degrees of freedom
Multiple R-squared:  0.6472,    Adjusted R-squared:  0.6467 
F-statistic:  1414 on 28 and 21584 DF,  p-value: < 2.2e-16

誤差の正規性のチェック

hist(house_lm3$residuals, breaks=seq(-1.7e+06,6.0e+06,5e+04))

qqプロット

qqnorm(house_lm3$residuals)
qqline(house_lm3$residuals, col="red")

正規性がないので、価格に対数( log(price) )を取ってみる。

house_lm4<-lm(log(price)~.-id-date-zipcode-lat-long-yr_built-yr_renovated, data=house_data)
summary(house_lm4)

Call:
lm(formula = log(price) ~ . - id - date - zipcode - lat - long - 
    yr_built - yr_renovated, data = house_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.47369 -0.23660  0.01019  0.22325  1.33856 

Coefficients: (1 not defined because of singularities)
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)    1.162e+01  3.318e-01  35.030  < 2e-16 ***
bedrooms      -1.626e-02  3.168e-03  -5.132 2.88e-07 ***
bathrooms     -1.585e-02  5.080e-03  -3.120 0.001814 ** 
sqft_living    2.618e-04  7.085e-06  36.953  < 2e-16 ***
sqft_lot       2.853e-07  7.881e-08   3.621 0.000295 ***
floors         7.108e-02  5.866e-03  12.116  < 2e-16 ***
waterfront1    3.671e-01  3.245e-02  11.315  < 2e-16 ***
view1          2.037e-01  1.855e-02  10.978  < 2e-16 ***
view2          1.349e-01  1.123e-02  12.013  < 2e-16 ***
view3          1.567e-01  1.543e-02  10.159  < 2e-16 ***
view4          2.648e-01  2.381e-02  11.121  < 2e-16 ***
condition2    -4.816e-02  6.680e-02  -0.721 0.470924    
condition3     5.527e-02  6.214e-02   0.889 0.373824    
condition4     1.338e-01  6.219e-02   2.151 0.031468 *  
condition5     2.674e-01  6.256e-02   4.274 1.93e-05 ***
grade3         1.438e-01  3.880e-01   0.371 0.710829    
grade4         2.210e-01  3.426e-01   0.645 0.518868    
grade5         3.019e-01  3.377e-01   0.894 0.371286    
grade6         4.932e-01  3.375e-01   1.461 0.143912    
grade7         6.818e-01  3.375e-01   2.020 0.043378 *  
grade8         8.589e-01  3.375e-01   2.544 0.010951 *  
grade9         1.061e+00  3.377e-01   3.141 0.001685 ** 
grade10        1.224e+00  3.378e-01   3.623 0.000291 ***
grade11        1.347e+00  3.383e-01   3.982 6.85e-05 ***
grade12        1.451e+00  3.399e-01   4.268 1.98e-05 ***
grade13        1.674e+00  3.510e-01   4.768 1.87e-06 ***
sqft_above    -1.203e-04  7.057e-06 -17.047  < 2e-16 ***
sqft_basement         NA         NA      NA       NA    
sqft_living15  8.770e-05  5.575e-06  15.731  < 2e-16 ***
sqft_lot15    -8.186e-07  1.201e-07  -6.814 9.75e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3316 on 21584 degrees of freedom
Multiple R-squared:  0.6041,    Adjusted R-squared:  0.6036 
F-statistic:  1176 on 28 and 21584 DF,  p-value: < 2.2e-16
hist(house_lm4$residuals, breaks=seq(-1.6, 1.6, 0.05))

qqnorm(house_lm4$residuals)
qqline(house_lm4$residuals, col="red")

正規性は保たれたが、summuryを見ると bedrooms -1.626e-02 bathrooms -1.585e-02 sqft_above -1.203e-04 の回帰係数(傾き)がマイナスになっている。これは、ベッド数が少なくなると、価格が下がるという事であり、モデルがおかしそうだ。

多重共線性の確認

説明変数間で相関が強くなっていないか?を確認

vif(house_lm4)
Error in vif.default(house_lm4) : 
  there are aliased coefficients in the model

there are aliased coefficients in the model

とは完全に相関してる項目が存在しているというエラー。 何が、相関しているかを確認

alias(lm(log(price)~.-id-date-zipcode-lat-long-yr_built-yr_renovated, data=house_data))
Model :
log(price) ~ (id + date + bedrooms + bathrooms + sqft_living + 
    sqft_lot + floors + waterfront + view + condition + grade + 
    sqft_above + sqft_basement + yr_built + yr_renovated + zipcode + 
    lat + long + sqft_living15 + sqft_lot15) - id - date - zipcode - 
    lat - long - yr_built - yr_renovated

Complete :
              (Intercept) bedrooms bathrooms sqft_living sqft_lot floors waterfront1 view1 view2 view3
sqft_basement  0           0        0         1           0        0      0           0     0     0   
              view4 condition2 condition3 condition4 condition5 grade3 grade4 grade5 grade6 grade7
sqft_basement  0     0          0          0          0          0      0      0      0      0    
              grade8 grade9 grade10 grade11 grade12 grade13 sqft_above sqft_living15 sqft_lot15
sqft_basement  0      0      0       0       0       0      -1          0             0        

sqft_basementとsqft_living、sqft_aboveの相関係数が±1なので、sqft_basementを削除した上で、多重共線性(マルチコ)を確認 一般に、5以上で要注意。10以上でマルチコありとなる

house_lm4<-lm(log(price)~.-id-date-zipcode-lat-long-yr_built-yr_renovated-sqft_basement,data=house_data)
vif(house_lm4)
                  GVIF Df GVIF^(1/(2*Df))
bedrooms      1.706847  1        1.306464
bathrooms     3.008411  1        1.734477
sqft_living   8.322593  1        2.884890
sqft_lot      2.094488  1        1.447235
floors        1.972327  1        1.404396
waterfront    1.548928  1        1.244559
view          1.818152  4        1.077591
condition     1.235554  4        1.026793
grade         4.263892 11        1.068138
sqft_above    6.712648  1        2.590878
sqft_living15 2.870156  1        1.694153
sqft_lot15    2.114837  1        1.454248

AICを再確認

AIC(house_lm4)
[1] 13650.99

13650.99 < 592752.6 (説明変数削除前から、AICが小さくなっている=モデルが良くなっている) 続いて、サマリーを再確認

summary(house_lm4)

Call:
lm(formula = log(price) ~ . - id - date - zipcode - lat - long - 
    yr_built - yr_renovated - sqft_basement, data = house_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.47369 -0.23660  0.01019  0.22325  1.33856 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)    1.162e+01  3.318e-01  35.030  < 2e-16 ***
bedrooms      -1.626e-02  3.168e-03  -5.132 2.88e-07 ***
bathrooms     -1.585e-02  5.080e-03  -3.120 0.001814 ** 
sqft_living    2.618e-04  7.085e-06  36.953  < 2e-16 ***
sqft_lot       2.853e-07  7.881e-08   3.621 0.000295 ***
floors         7.108e-02  5.866e-03  12.116  < 2e-16 ***
waterfront1    3.671e-01  3.245e-02  11.315  < 2e-16 ***
view1          2.037e-01  1.855e-02  10.978  < 2e-16 ***
view2          1.349e-01  1.123e-02  12.013  < 2e-16 ***
view3          1.567e-01  1.543e-02  10.159  < 2e-16 ***
view4          2.648e-01  2.381e-02  11.121  < 2e-16 ***
condition2    -4.816e-02  6.680e-02  -0.721 0.470924    
condition3     5.527e-02  6.214e-02   0.889 0.373824    
condition4     1.338e-01  6.219e-02   2.151 0.031468 *  
condition5     2.674e-01  6.256e-02   4.274 1.93e-05 ***
grade3         1.438e-01  3.880e-01   0.371 0.710829    
grade4         2.210e-01  3.426e-01   0.645 0.518868    
grade5         3.019e-01  3.377e-01   0.894 0.371286    
grade6         4.932e-01  3.375e-01   1.461 0.143912    
grade7         6.818e-01  3.375e-01   2.020 0.043378 *  
grade8         8.589e-01  3.375e-01   2.544 0.010951 *  
grade9         1.061e+00  3.377e-01   3.141 0.001685 ** 
grade10        1.224e+00  3.378e-01   3.623 0.000291 ***
grade11        1.347e+00  3.383e-01   3.982 6.85e-05 ***
grade12        1.451e+00  3.399e-01   4.268 1.98e-05 ***
grade13        1.674e+00  3.510e-01   4.768 1.87e-06 ***
sqft_above    -1.203e-04  7.057e-06 -17.047  < 2e-16 ***
sqft_living15  8.770e-05  5.575e-06  15.731  < 2e-16 ***
sqft_lot15    -8.186e-07  1.201e-07  -6.814 9.75e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3316 on 21584 degrees of freedom
Multiple R-squared:  0.6041,    Adjusted R-squared:  0.6036 
F-statistic:  1176 on 28 and 21584 DF,  p-value: < 2.2e-16

マルチコを解消したが、p直で棄却できない項目(condition、grade)を説明変数から外す。

house_lm5<-lm(log(price)~.-id-date-zipcode-lat-long-yr_built-yr_renovated-sqft_basement-condition-grade,data=house_data)
summary(house_lm5)

Call:
lm(formula = log(price) ~ . - id - date - zipcode - lat - long - 
    yr_built - yr_renovated - sqft_basement - condition - grade, 
    data = house_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.45489 -0.26683  0.01249  0.24625  1.60821 

Coefficients:
                Estimate Std. Error  t value Pr(>|t|)    
(Intercept)    1.207e+01  1.205e-02 1001.212  < 2e-16 ***
bedrooms      -3.423e-02  3.293e-03  -10.393  < 2e-16 ***
bathrooms      1.990e-02  5.320e-03    3.741 0.000184 ***
sqft_living    3.478e-04  7.383e-06   47.112  < 2e-16 ***
sqft_lot       2.687e-07  8.466e-08    3.173 0.001508 ** 
floors         1.044e-01  6.026e-03   17.326  < 2e-16 ***
waterfront1    3.213e-01  3.485e-02    9.220  < 2e-16 ***
view1          2.175e-01  1.994e-02   10.905  < 2e-16 ***
view2          1.647e-01  1.205e-02   13.664  < 2e-16 ***
view3          1.950e-01  1.656e-02   11.780  < 2e-16 ***
view4          3.314e-01  2.547e-02   13.010  < 2e-16 ***
sqft_above    -9.795e-05  7.393e-06  -13.250  < 2e-16 ***
sqft_living15  1.682e-04  5.669e-06   29.674  < 2e-16 ***
sqft_lot15    -1.035e-06  1.291e-07   -8.017 1.14e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3567 on 21599 degrees of freedom
Multiple R-squared:  0.5417,    Adjusted R-squared:  0.5414 
F-statistic:  1964 on 13 and 21599 DF,  p-value: < 2.2e-16
AIC(house_lm5)
[1] 16786.6
vif(house_lm5)
                  GVIF Df GVIF^(1/(2*Df))
bedrooms      1.594029  1        1.262549
bathrooms     2.852491  1        1.688932
sqft_living   7.811808  1        2.794961
sqft_lot      2.089329  1        1.445451
floors        1.798878  1        1.341223
waterfront    1.544181  1        1.242651
view          1.784447  4        1.075073
sqft_above    6.367104  1        2.523312
sqft_living15 2.565055  1        1.601579
sqft_lot15    2.109817  1        1.452521

AIC(16786.6)は大きくなり、bedroomsとsqft_aboveの説明がつかないが全体的にはよくなった。

ステップワイズ方で、モデルの最適化を試す。

stepとAICではAICの直が変わるが、step関数内で使用しているAICは定数項を除いたAIC(extractAIC)。 どちらでも意味合いとしては同じ。

house_lm6<-step(house_lm3)
Start:  AIC=531415.5
price ~ (id + date + bedrooms + bathrooms + sqft_living + sqft_lot + 
    floors + waterfront + view + condition + grade + sqft_above + 
    sqft_basement + yr_built + yr_renovated + zipcode + lat + 
    long + sqft_living15 + sqft_lot15) - id - date - zipcode - 
    lat - long - yr_built - yr_renovated


Step:  AIC=531415.5
price ~ bedrooms + bathrooms + sqft_living + sqft_lot + floors + 
    waterfront + view + condition + grade + sqft_above + sqft_living15 + 
    sqft_lot15

                Df  Sum of Sq        RSS    AIC
- sqft_lot       1 7.6241e+09 1.0277e+15 531414
- bathrooms      1 2.5187e+10 1.0278e+15 531414
<none>                        1.0277e+15 531416
- sqft_living15  1 1.1964e+12 1.0289e+15 531439
- floors         1 1.6228e+12 1.0294e+15 531448
- bedrooms       1 3.3391e+12 1.0311e+15 531484
- sqft_lot15     1 3.7476e+12 1.0315e+15 531492
- sqft_above     1 7.6023e+12 1.0353e+15 531573
- condition      4 2.7015e+13 1.0548e+15 531968
- waterfront     1 2.8272e+13 1.0560e+15 532000
- view           4 2.9709e+13 1.0574e+15 532023
- sqft_living    1 7.7063e+13 1.1048e+15 532976
- grade         11 2.0609e+14 1.2338e+15 535344

Step:  AIC=531413.7
price ~ bedrooms + bathrooms + sqft_living + floors + waterfront + 
    view + condition + grade + sqft_above + sqft_living15 + sqft_lot15

                Df  Sum of Sq        RSS    AIC
- bathrooms      1 2.5070e+10 1.0278e+15 531412
<none>                        1.0277e+15 531414
- sqft_living15  1 1.1895e+12 1.0289e+15 531437
- floors         1 1.6167e+12 1.0294e+15 531446
- bedrooms       1 3.3470e+12 1.0311e+15 531482
- sqft_lot15     1 6.9010e+12 1.0346e+15 531556
- sqft_above     1 7.5972e+12 1.0353e+15 531571
- condition      4 2.7014e+13 1.0548e+15 531966
- waterfront     1 2.8266e+13 1.0560e+15 531998
- view           4 2.9742e+13 1.0575e+15 532022
- sqft_living    1 7.7093e+13 1.1048e+15 532975
- grade         11 2.0610e+14 1.2338e+15 535342

Step:  AIC=531412.2
price ~ bedrooms + sqft_living + floors + waterfront + view + 
    condition + grade + sqft_above + sqft_living15 + sqft_lot15

                Df  Sum of Sq        RSS    AIC
<none>                        1.0278e+15 531412
- sqft_living15  1 1.1933e+12 1.0290e+15 531435
- floors         1 1.6571e+12 1.0294e+15 531445
- bedrooms       1 3.5473e+12 1.0313e+15 531485
- sqft_lot15     1 6.8930e+12 1.0347e+15 531555
- sqft_above     1 7.5973e+12 1.0354e+15 531569
- condition      4 2.7131e+13 1.0549e+15 531967
- waterfront     1 2.8261e+13 1.0560e+15 531996
- view           4 2.9777e+13 1.0576e+15 532021
- sqft_living    1 8.9610e+13 1.1174e+15 533217
- grade         11 2.0637e+14 1.2341e+15 535345
summary(house_lm6)

Call:
lm(formula = price ~ bedrooms + sqft_living + floors + waterfront + 
    view + condition + grade + sqft_above + sqft_living15 + sqft_lot15, 
    data = house_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1632940  -117737   -18626    90275  4233342 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)    6.907e+04  2.183e+05   0.316 0.751734    
bedrooms      -1.773e+04  2.054e+03  -8.631  < 2e-16 ***
sqft_living    1.863e+02  4.294e+00  43.383  < 2e-16 ***
floors         2.160e+04  3.661e+03   5.899 3.70e-09 ***
waterfront1    5.202e+05  2.135e+04  24.363  < 2e-16 ***
view1          1.433e+05  1.221e+04  11.738  < 2e-16 ***
view2          8.378e+04  7.386e+03  11.343  < 2e-16 ***
view3          1.354e+05  1.014e+04  13.352  < 2e-16 ***
view4          2.770e+05  1.567e+04  17.683  < 2e-16 ***
condition2    -1.283e+04  4.396e+04  -0.292 0.770318    
condition3    -1.973e+04  4.089e+04  -0.483 0.629354    
condition4     3.114e+04  4.092e+04   0.761 0.446728    
condition5     1.065e+05  4.116e+04   2.588 0.009658 ** 
grade3         2.791e+04  2.553e+05   0.109 0.912958    
grade4         3.740e+04  2.254e+05   0.166 0.868230    
grade5         2.196e+04  2.222e+05   0.099 0.921278    
grade6         5.735e+04  2.221e+05   0.258 0.796204    
grade7         9.181e+04  2.221e+05   0.413 0.679291    
grade8         1.538e+05  2.221e+05   0.692 0.488769    
grade9         2.858e+05  2.222e+05   1.286 0.198417    
grade10        4.723e+05  2.223e+05   2.125 0.033614 *  
grade11        7.434e+05  2.226e+05   3.340 0.000839 ***
grade12        1.207e+06  2.237e+05   5.395 6.91e-08 ***
grade13        2.495e+06  2.309e+05  10.802  < 2e-16 ***
sqft_above    -5.820e+01  4.608e+00 -12.632  < 2e-16 ***
sqft_living15  1.834e+01  3.663e+00   5.006 5.60e-07 ***
sqft_lot15    -6.788e-01  5.641e-02 -12.032  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 218200 on 21586 degrees of freedom
Multiple R-squared:  0.6472,    Adjusted R-squared:  0.6467 
F-statistic:  1523 on 26 and 21586 DF,  p-value: < 2.2e-16
AIC(house_lm6)
[1] 592749.2

予測

単純にする為、price-sqft_aboveの単回帰分析を考える

house_lm6<-lm(price~sqft_above, data=house_data)
house_lm6

Call:
lm(formula = price ~ sqft_above, data = house_data)

Coefficients:
(Intercept)   sqft_above  
    59953.2        268.5  
plot(house_data$sqft_above, house_data$price)
abline(house_lm6, col="red")

new_data<-data.frame(sqft_above=c(2000,4000,6000,8000))
new_data

回帰直線上の値を求める場合

predict(house_lm6, newdata = new_data)
        1         2         3         4 
 596899.6 1133845.9 1670792.3 2207738.7 

信頼区間も含めて求める場合

predict(house_lm6, newdata = new_data, interval = “confidence”)

信頼区間も含めて求める場合

predict(house_lm6, newdata = new_data, interval = “predict”)

可視化してみましょう

plot(house_data\(sqft_above, house_data\)price, xlim=c(-1000, 12000), ylim=c(-1000,4e+6)) abline(house_lm6, col=“red”)

sqft_above_seq<-c(-1000:12000) new_data2<-data.frame(sqft_above=sqft_above_seq) conf_interval<-predict(house_lm6, newdata = new_data2, interval = “confidence”) lines(sqft_above_seq,conf_interval[,2],col=“blue”,lty=2) lines(sqft_above_seq,conf_interval[,3],col=“blue”,lty=2)

pred_interval<-predict(house_lm1, newdata = new_data2, interval = “prediction”) lines(sqft_above_seq,pred_interval[,2],col=“green”,lty=2) lines(sqft_above_seq,pred_interval[,3],col=“green”,lty=2)

(参考)機械学習的アプローチ 学習データとテストデータに分割して精度検証

学習データとテストデータに分割します

train_idx<-sample(c(1:dim(house_data)[1]), size = dim(house_data)[1]*0.7) train <- house_data[train_idx, ] test <- house_data[-train_idx, ]

mymodel<-lm(price ~ bedrooms + floors + waterfront + view + condition + yr_built + yr_renovated + sqft_living15 + sqft_lot15, data=train)

ypred<-predict(mymodel, newdata = test) mse<-sum((test$price - ypred)^2)/length(ypred) mse rmse<-sqrt(mse) rmse

LS0tCnRpdGxlOiAi57ea5b2i5Zue5biw5YiG5p6QIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIOODh+ODvOOCv+OBruiqreOBv+i+vOOBvwoKYGBge3J9CmxpYnJhcnkoTUFTUykKbGlicmFyeShEVCkKbGlicmFyeShjYXIpCgpob3VzZV9kYXRhPC1yZWFkLmNzdigiZGF0YS9rY19ob3VzZV9kYXRhLmNzdiIpCgpEVDo6ZGF0YXRhYmxlKGhvdXNlX2RhdGEsIGNsYXNzID0gInN0cmlwZSBjZWxsLWJvcmRlciIsIGZpbHRlciA9ICd0b3AnLCBleHRlbnNpb25zID0gJ0NvbFJlb3JkZXInLCBvcHRpb25zID0gbGlzdChhdXRvV2lkdGggPSBUUlVFLCBkb20gPSAnUmxmcnRpcCcsIGF1dG9XaWR0aCA9IFRSVUUpKQpgYGAKCiMgc3FmdF9hYm92ZeOCkuiqrOaYjuWkieaVsOOAgXByaWNl44KS6KKr6Kqs5piO5aSJ5pWw44Go44GX44Gm57ea5b2i5Zue5biw44Oi44OH44Or44KS44KE44Gj44Gm44G/44KLCmBgYHtyfQojIOWCvuOBjToyNjguNeOAgeWIh+eJhzo1OTk1My4yCmhvdXNlX2xtMTwtbG0ocHJpY2V+c3FmdF9hYm92ZSwgZGF0YT1ob3VzZV9kYXRhKQpob3VzZV9sbTEKYGBgCgpgYGB7cn0KIyDjgrXjg57jg6rjg7zvvJrmsbrlrprkv4LmlbDvvJowLjM2NjfjgIHliIfniYfjgajjgIHlgr7jgY3jga5w5YCk44GvMmUtMTYKc3VtbWFyeShob3VzZV9sbTEpCmBgYAropoHntITjgaflpKfliIfjgarnrofmiYDjga/jgIFQcig+fHR8KSDvvIhw5YCk77yJ44GoTXVsdGlwbGUgUi1zcXVhcmVk77yI5rG65a6a5L+C5pWw77yJCgo+IENvZWZmaWNpZW50czogIAo+ICAgICAgICAgICAgIEVzdGltYXRlIFN0ZC4gRXJyb3IgdCB2YWx1ZSBQcig+fHR8KSAgICAKPiAoSW50ZXJjZXB0KSAgNTk5NTMuMiAgICAgNDcyOS44ICAgMTIuNjggICA8MmUtMTYgKioqICAKPiBzcWZ0X2Fib3ZlICAgICAyNjguNSAgICAgICAgMi40ICAxMTEuODcgICA8MmUtMTYgKioqICAKPgo+IE11bHRpcGxlIFItc3F1YXJlZDogIDAuMzY2NywJQWRqdXN0ZWQgUi1zcXVhcmVkOiAgMC4zNjY3IAoKCiMjIOaui+W3ruOBruato+imj+aAp+OCkuODkuOCueODiOOCsOODqeODoOOBp+ODgeOCp+ODg+OCrwroqqTlt67vvIjOte+8ieOBr+ato+imj+WIhuW4g+OBq+W+k+OBhuOBqOS7ruWumuOBl+OBpuOBhOOBn+OBruOBp+OAgeWun+mam+OBruaui+W3ruOBjOato+imj+WIhuW4g+OBq+W+k+OBo+OBpuOBhOOCi+OBi+OCkueiuuiqjQrmraPopo/liIbluIPjgavjgZfjgZ/jgYzjgaPjgabjgYTjgarjgYTjgarjgonjgbDjgIHku67lrprjgYzltKnjgozjgovjgIIKYGBge3J9Cmhpc3QoaG91c2VfbG0xJHJlc2lkdWFscywgYnJlYWtzPXNlcSgtMWUrMDYsNS40ZSswNiwxZSswNSkpCmBgYAoKIyMg5q6L5beu44Gu5q2j6KaP5oCn44KScXHjg5fjg63jg4Pjg4jjgafjg4Hjgqfjg4Pjgq8K5qiZ5rqW5q2j6KaP5YiG5biD44Gr5aSJ5o+b44GX44Gf5aC05ZCI44CB44Gp44Gu5L2N6Zui44KM44Gm44GE44KL44GL77yf44KS56K66KqNCmBgYHtyfQojIyBxceODl+ODreODg+ODiApxcW5vcm0oaG91c2VfbG0xJHJlc2lkdWFscykKcXFsaW5lKGhvdXNlX2xtMSRyZXNpZHVhbHMsIGNvbD0icmVkIikKYGBgCuato+imj+aAp+OBjOS/neOBn+OCjOOBpuOBhOOBquOBhOOBruOBp+OAgeS+oeagvOOBruWvvuaVsOOBp+ODouODh+ODq+OCkuS9nOaIkOOBl+OBpuOBv+OCi+OAggoKYGBge3J9CmhvdXNlX2xtMjwtbG0obG9nKHByaWNlKX5zcWZ0X2Fib3ZlLCBkYXRhPWhvdXNlX2RhdGEpCnN1bW1hcnkoaG91c2VfbG0yKQpgYGAKCmBgYHtyfQpoaXN0KGhvdXNlX2xtMiRyZXNpZHVhbHMsYnJlYWtzPXNlcSgtMiwgMiwgMC4wNSkpCmBgYAoKYGBge3J9CnFxbm9ybShob3VzZV9sbTIkcmVzaWR1YWxzKQpxcWxpbmUoaG91c2VfbG0yJHJlc2lkdWFscywgY29sPSJyZWQiKQpgYGAK6Kqk5beu44Gu5q2j6KaP5oCn44Gv6Imv44GV44Gd44GG44CCCu+8g+OCr+ODg+OCr+OBrui3nembogrlpJbjgozlgKTjgYzjgYLjgovjgYvjgpLoqr/jgbnjgosKYGBge3J9CmhvdXNlX2xtMl93aXAgPC0gaG91c2VfbG0yCmNrX2Rpc3QgPC0gY29va3MuZGlzdGFuY2UoaG91c2VfbG0yX3dpcCkKY2tfZGlzdFtja19kaXN0ID09IG1heChja19kaXN0KV0KYGBgCue3j+i3nembouOBrzEyNzc444CB5pyA44KC5aSW44KM44Gm44GE44KL44Gu44GvMC4wMTk4NDI5NyAK5o+P55S744GX44Gm44G/44KL44CCCgpgYGB7cn0KcGxvdChob3VzZV9sbTJfd2lwKQpgYGAK44Kv44OD44Kv9I+wgOi3nembouOBjOOAgTQv44OH44O844K/5pWw44KI44KK5aSn44GN44GE44OH44O844K/54K59I+wgeOBr+OCpOODs+ODleODq+OCqOODs+OCueOBjOWkp+OBjeOBhOOBqOiogOOCj+OCjOOBpuOBhOOCi+OAggrkuIrlm7PjgaDjgajvvIgxNjIz44CBMTg1OTXjgIExMjc3OO+8ieOAguWkluOCjOWApOOCkumZpOWkluOBl+OBpuOAgeODouODh+ODq+OCkuWGjeani+eviQpgYGB7cn0KaG91c2VfZGF0YV93aXAgPC0gaG91c2VfZGF0YVstYygxNjIzLCAxODU5NSwgMTI3NzgpLCBdCmhvdXNlX2xtMl93aXA8LWxtKGxvZyhwcmljZSl+c3FmdF9hYm92ZSwgZGF0YT1ob3VzZV9kYXRhX3dpcCkKCmNrY2tfZGlzdCA8LSBjb29rcy5kaXN0YW5jZShob3VzZV9sbTJfd2lwKQpja19kaXN0W2NrX2Rpc3QgPT0gbWF4KGNrX2Rpc3QpXQpzdW1tYXJ5KGhvdXNlX2xtMl93aXApCmBgYApgYGB7cn0KcGxvdChob3VzZV9sbTJfd2lwKQpgYGAK5LuK5Zue44Gv44CB5aSW44KM5YCk44KS6Zmk44GP44Go44CB5rG65a6a5L+C5pWw44KC44Kv44OD44Kv44Gu6Led6Zui44Gu5aSn44GN44GP44Gq44Gj44Gm44GX44G+44Gj44Gf44Gu44Gn44CB5aSW44GV44Gq44GE5pa544GM6Imv44GE44CCCgojIOmHjeWbnuW4sOWIhuaekAoKIyMg5rqW5YKZ77ya44Kr44OG44K044Oq5aSJ5pWw44KS44OA44Of44O85aSJ5pWw44Gr5aSJ5o+b44GZ44KLCndhdGVyZnJvbnQsdmlldyxjb25kaXRpb24sZ3JhZGXjgpLlpInmj5vjgZnjgovjgILlpInmj5vjga8KPiBhcy5mYWN0b3LvvIjlm6DlrZDlnovvvInjgavlpInmj5vjgZnjgosKCmBgYHtyfQpob3VzZV9kYXRhJHdhdGVyZnJvbnQ8LWFzLmZhY3Rvcihob3VzZV9kYXRhJHdhdGVyZnJvbnQpCmhvdXNlX2RhdGEkdmlldzwtYXMuZmFjdG9yKGhvdXNlX2RhdGEkdmlldykKaG91c2VfZGF0YSRjb25kaXRpb248LWFzLmZhY3Rvcihob3VzZV9kYXRhJGNvbmRpdGlvbikKaG91c2VfZGF0YSRncmFkZTwtYXMuZmFjdG9yKGhvdXNlX2RhdGEkZ3JhZGUpCmBgYAoKIyMg6YeN5Zue5biw5YiG5p6QCmlkLGRhdGUsemlwY29kZSxsYXQsbG9uZyx5cl9idWlsdCx5cl9yZW5vdmF0ZWQg44KS6Zmk44GE44Gm6YeN5Zue5biw5YiG5p6QCuiqrOaYjuWkieaVsOOCkuWil+OChOOBm+OBsOOAgeaxuuWumuS/guaVsOOBjOS4iuOBjOOCi+OBruOBp+OAgeiqrOaYjuWkieaVsOOCkuWil+OChOOBmeOBk+OBqOOBuOOBruODmuODiuODq+ODhuOCo+ODvOOCkuWKoOOBiOOBn+S4iuOBp+WIhuaekArliIbmnpDntZDmnpzjga/jgIHmlbDlrabnmoTjgavoqLzmmI7jgZXjgozjgabjgYTjgotBSUPjgafoqZXkvqHvvIg1OTI3NTIuNu+8iQpgYGB7cn0KaG91c2VfbG0zPC1sbShwcmljZX4uLWlkLWRhdGUtemlwY29kZS1sYXQtbG9uZy15cl9idWlsdC15cl9yZW5vdmF0ZWQsIGRhdGE9aG91c2VfZGF0YSkKQUlDKGhvdXNlX2xtMykKYGBgCuS4gOaWueOAgeiHqueUseW6puiqv+aVtOa4iOOBv+axuuWumuS/guaVsO+8iEFkanVzdGVkIFItc3F1YXJlZDogIDAuNjQ2N++8ieOBr+aVsOWtpueahOOBquagueaLoOOBjOaPkOahiOOAggpgYGB7cn0Kc3VtbWFyeShob3VzZV9sbTMpCmBgYAojIyMg6Kqk5beu44Gu5q2j6KaP5oCn44Gu44OB44Kn44OD44KvCmBgYHtyfQpoaXN0KGhvdXNlX2xtMyRyZXNpZHVhbHMsIGJyZWFrcz1zZXEoLTEuN2UrMDYsNi4wZSswNiw1ZSswNCkpCmBgYAoKcXHjg5fjg63jg4Pjg4gKYGBge3J9CnFxbm9ybShob3VzZV9sbTMkcmVzaWR1YWxzKQpxcWxpbmUoaG91c2VfbG0zJHJlc2lkdWFscywgY29sPSJyZWQiKQpgYGAK5q2j6KaP5oCn44GM44Gq44GE44Gu44Gn44CB5L6h5qC844Gr5a++5pWwKCBsb2cocHJpY2UpICnjgpLlj5bjgaPjgabjgb/jgovjgIIKYGBge3J9CmhvdXNlX2xtNDwtbG0obG9nKHByaWNlKX4uLWlkLWRhdGUtemlwY29kZS1sYXQtbG9uZy15cl9idWlsdC15cl9yZW5vdmF0ZWQsIGRhdGE9aG91c2VfZGF0YSkKc3VtbWFyeShob3VzZV9sbTQpCmBgYAoKCmBgYHtyfQpoaXN0KGhvdXNlX2xtNCRyZXNpZHVhbHMsIGJyZWFrcz1zZXEoLTEuNiwgMS42LCAwLjA1KSkKYGBgCgpgYGB7cn0KcXFub3JtKGhvdXNlX2xtNCRyZXNpZHVhbHMpCnFxbGluZShob3VzZV9sbTQkcmVzaWR1YWxzLCBjb2w9InJlZCIpCmBgYAoKCuato+imj+aAp+OBr+S/neOBn+OCjOOBn+OBjOOAgXN1bW11cnnjgpLopovjgovjgagKIGJlZHJvb21zICAgICAgLTEuNjI2ZS0wMgogYmF0aHJvb21zICAgICAtMS41ODVlLTAyCiBzcWZ0X2Fib3ZlICAgIC0xLjIwM2UtMDQK44Gu5Zue5biw5L+C5pWw77yI5YK+44GN77yJ44GM44Oe44Kk44OK44K544Gr44Gq44Gj44Gm44GE44KL44CC44GT44KM44Gv44CB44OZ44OD44OJ5pWw44GM5bCR44Gq44GP44Gq44KL44Go44CB5L6h5qC844GM5LiL44GM44KL44Go44GE44GG5LqL44Gn44GC44KK44CB44Oi44OH44Or44GM44GK44GL44GX44Gd44GG44Gg44CCCgojIyMg5aSa6YeN5YWx57ea5oCn44Gu56K66KqNCuiqrOaYjuWkieaVsOmWk+OBp+ebuOmWouOBjOW8t+OBj+OBquOBo+OBpuOBhOOBquOBhOOBi++8n+OCkueiuuiqjQpgYGB7cn0KdmlmKGhvdXNlX2xtNCkKYGBgCj4gdGhlcmUgYXJlIGFsaWFzZWQgY29lZmZpY2llbnRzIGluIHRoZSBtb2RlbCAKCuOBqOOBr+WujOWFqOOBq+ebuOmWouOBl+OBpuOCi+mgheebruOBjOWtmOWcqOOBl+OBpuOBhOOCi+OBqOOBhOOBhuOCqOODqeODvOOAggrkvZXjgYzjgIHnm7jplqLjgZfjgabjgYTjgovjgYvjgpLnorroqo0KCmBgYHtyfQphbGlhcyhsbShsb2cocHJpY2Upfi4taWQtZGF0ZS16aXBjb2RlLWxhdC1sb25nLXlyX2J1aWx0LXlyX3Jlbm92YXRlZCwgZGF0YT1ob3VzZV9kYXRhKSkKYGBgCnNxZnRfYmFzZW1lbnTjgahzcWZ0X2xpdmluZ+OAgXNxZnRfYWJvdmXjga7nm7jplqLkv4LmlbDjgYzCsTHjgarjga7jgafjgIFzcWZ0X2Jhc2VtZW5044KS5YmK6Zmk44GX44Gf5LiK44Gn44CB5aSa6YeN5YWx57ea5oCn77yI44Oe44Or44OB44Kz77yJ44KS56K66KqNCuS4gOiIrOOBq+OAgTXku6XkuIrjgafopoHms6jmhI/jgIIxMOS7peS4iuOBp+ODnuODq+ODgeOCs+OBguOCiuOBqOOBquOCiwpgYGB7cn0KaG91c2VfbG00PC1sbShsb2cocHJpY2Upfi4taWQtZGF0ZS16aXBjb2RlLWxhdC1sb25nLXlyX2J1aWx0LXlyX3Jlbm92YXRlZC1zcWZ0X2Jhc2VtZW50LGRhdGE9aG91c2VfZGF0YSkKdmlmKGhvdXNlX2xtNCkKYGBgCkFJQ+OCkuWGjeeiuuiqjQpgYGB7cn0KQUlDKGhvdXNlX2xtNCkKYGBgCiAxMzY1MC45OSA8IDU5Mjc1Mi42ICjoqqzmmI7lpInmlbDliYrpmaTliY3jgYvjgonjgIFBSUPjgYzlsI/jgZXjgY/jgarjgaPjgabjgYTjgos944Oi44OH44Or44GM6Imv44GP44Gq44Gj44Gm44GE44KL77yJCiDntprjgYTjgabjgIHjgrXjg57jg6rjg7zjgpLlho3norroqo0KYGBge3J9CnN1bW1hcnkoaG91c2VfbG00KQpgYGAKIOODnuODq+ODgeOCs+OCkuino+a2iOOBl+OBn+OBjOOAgXDnm7Tjgafmo4TljbTjgafjgY3jgarjgYTpoIXnm67vvIhjb25kaXRpb27jgIFncmFkZe+8ieOCkuiqrOaYjuWkieaVsOOBi+OCieWkluOBmeOAggpgYGB7cn0KaG91c2VfbG01PC1sbShsb2cocHJpY2Upfi4taWQtZGF0ZS16aXBjb2RlLWxhdC1sb25nLXlyX2J1aWx0LXlyX3Jlbm92YXRlZC1zcWZ0X2Jhc2VtZW50LWNvbmRpdGlvbi1ncmFkZSxkYXRhPWhvdXNlX2RhdGEpCnN1bW1hcnkoaG91c2VfbG01KQpBSUMoaG91c2VfbG01KQp2aWYoaG91c2VfbG01KQpgYGAKQUlD77yIMTY3ODYuNu+8ieOBr+Wkp+OBjeOBj+OBquOCiuOAgWJlZHJvb21z44Goc3FmdF9hYm92ZeOBruiqrOaYjuOBjOOBpOOBi+OBquOBhOOBjOWFqOS9k+eahOOBq+OBr+OCiOOBj+OBquOBo+OBn+OAggoKIyDjgrnjg4bjg4Pjg5fjg6/jgqTjgrrmlrnjgafjgIHjg6Ljg4fjg6vjga7mnIDpganljJbjgpLoqabjgZnjgIIKc3RlcOOBqEFJQ+OBp+OBr0FJQ+OBruebtOOBjOWkieOCj+OCi+OBjOOAgXN0ZXDplqLmlbDlhoXjgafkvb/nlKjjgZfjgabjgYTjgotBSUPjga/lrprmlbDpoIXjgpLpmaTjgYTjgZ9BSUMoZXh0cmFjdEFJQynjgIIK44Gp44Gh44KJ44Gn44KC5oSP5ZGz5ZCI44GE44Go44GX44Gm44Gv5ZCM44GY44CCCmBgYHtyfQpob3VzZV9sbTY8LXN0ZXAoaG91c2VfbG0zKQpzdW1tYXJ5KGhvdXNlX2xtNikKQUlDKGhvdXNlX2xtNikKYGBgCgoKIyDkuojmuKwK5Y2Y57SU44Gr44GZ44KL54K644CBcHJpY2Utc3FmdF9hYm92ZeOBruWNmOWbnuW4sOWIhuaekOOCkuiAg+OBiOOCiwpgYGB7cn0KaG91c2VfbG02PC1sbShwcmljZX5zcWZ0X2Fib3ZlLCBkYXRhPWhvdXNlX2RhdGEpCmhvdXNlX2xtNgpgYGAKCmBgYHtyfQpwbG90KGhvdXNlX2RhdGEkc3FmdF9hYm92ZSwgaG91c2VfZGF0YSRwcmljZSkKYWJsaW5lKGhvdXNlX2xtNiwgY29sPSJyZWQiKQpgYGAKCmBgYHtyfQpuZXdfZGF0YTwtZGF0YS5mcmFtZShzcWZ0X2Fib3ZlPWMoMjAwMCw0MDAwLDYwMDAsODAwMCkpCm5ld19kYXRhCmBgYAoKIyMg5Zue5biw55u057ea5LiK44Gu5YCk44KS5rGC44KB44KL5aC05ZCICmBgYHtyfQpwcmVkaWN0KGhvdXNlX2xtNiwgbmV3ZGF0YSA9IG5ld19kYXRhKQpgYGAKCgoKIyMg5L+h6aC85Yy66ZaT44KC5ZCr44KB44Gm5rGC44KB44KL5aC05ZCICnByZWRpY3QoaG91c2VfbG02LCBuZXdkYXRhID0gbmV3X2RhdGEsIGludGVydmFsID0gImNvbmZpZGVuY2UiKQoKIyMg5L+h6aC85Yy66ZaT44KC5ZCr44KB44Gm5rGC44KB44KL5aC05ZCICnByZWRpY3QoaG91c2VfbG02LCBuZXdkYXRhID0gbmV3X2RhdGEsIGludGVydmFsID0gInByZWRpY3QiKQoKI+WPr+imluWMluOBl+OBpuOBv+OBvuOBl+OCh+OBhgpwbG90KGhvdXNlX2RhdGEkc3FmdF9hYm92ZSwgaG91c2VfZGF0YSRwcmljZSwgeGxpbT1jKC0xMDAwLCAxMjAwMCksIHlsaW09YygtMTAwMCw0ZSs2KSkKYWJsaW5lKGhvdXNlX2xtNiwgY29sPSJyZWQiKQoKc3FmdF9hYm92ZV9zZXE8LWMoLTEwMDA6MTIwMDApCm5ld19kYXRhMjwtZGF0YS5mcmFtZShzcWZ0X2Fib3ZlPXNxZnRfYWJvdmVfc2VxKQpjb25mX2ludGVydmFsPC1wcmVkaWN0KGhvdXNlX2xtNiwgbmV3ZGF0YSA9IG5ld19kYXRhMiwgaW50ZXJ2YWwgPSAiY29uZmlkZW5jZSIpCmxpbmVzKHNxZnRfYWJvdmVfc2VxLGNvbmZfaW50ZXJ2YWxbLDJdLGNvbD0iYmx1ZSIsbHR5PTIpCmxpbmVzKHNxZnRfYWJvdmVfc2VxLGNvbmZfaW50ZXJ2YWxbLDNdLGNvbD0iYmx1ZSIsbHR5PTIpCgpwcmVkX2ludGVydmFsPC1wcmVkaWN0KGhvdXNlX2xtMSwgbmV3ZGF0YSA9IG5ld19kYXRhMiwgaW50ZXJ2YWwgPSAicHJlZGljdGlvbiIpCmxpbmVzKHNxZnRfYWJvdmVfc2VxLHByZWRfaW50ZXJ2YWxbLDJdLGNvbD0iZ3JlZW4iLGx0eT0yKQpsaW5lcyhzcWZ0X2Fib3ZlX3NlcSxwcmVkX2ludGVydmFsWywzXSxjb2w9ImdyZWVuIixsdHk9MikKCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMKIyMg77yI5Y+C6ICD77yJ5qmf5qKw5a2m57+S55qE44Ki44OX44Ot44O844OBIOWtpue/kuODh+ODvOOCv+OBqOODhuOCueODiOODh+ODvOOCv+OBq+WIhuWJsuOBl+OBpueyvuW6puaknOiovAojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjCgoj5a2m57+S44OH44O844K/44Go44OG44K544OI44OH44O844K/44Gr5YiG5Ymy44GX44G+44GZCnRyYWluX2lkeDwtc2FtcGxlKGMoMTpkaW0oaG91c2VfZGF0YSlbMV0pLCBzaXplID0gZGltKGhvdXNlX2RhdGEpWzFdKjAuNykKdHJhaW4gPC0gaG91c2VfZGF0YVt0cmFpbl9pZHgsIF0KdGVzdCA8LSBob3VzZV9kYXRhWy10cmFpbl9pZHgsIF0KCm15bW9kZWw8LWxtKHByaWNlIH4gYmVkcm9vbXMgKyBmbG9vcnMgKyB3YXRlcmZyb250ICsgdmlldyArIAogICAgICAgICAgICAgICAgIGNvbmRpdGlvbiArIHlyX2J1aWx0ICsgeXJfcmVub3ZhdGVkICsgc3FmdF9saXZpbmcxNSArIHNxZnRfbG90MTUsIGRhdGE9dHJhaW4pCgp5cHJlZDwtcHJlZGljdChteW1vZGVsLCBuZXdkYXRhID0gdGVzdCkKbXNlPC1zdW0oKHRlc3QkcHJpY2UgLSB5cHJlZCleMikvbGVuZ3RoKHlwcmVkKQptc2UKcm1zZTwtc3FydChtc2UpCnJtc2UK